# Standard packages
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as colors
import matplotlib as mpl
import numpy as np
import pandas as pd
import os.path as path
import datetime as dt
import warnings
import seaborn as sb
import cosmicsense as cs
warnings.simplefilter('once', RuntimeWarning)
# Display figures inline
%matplotlib inline
# Display figures as interactive (requires kernel restart)
#%matplotlib
f_prec = "/media/x/cosmicsense/data/fendt/dwd/produkt_rr_stunde_02290.txt"
prec = pd.read_csv(f_prec, sep=";", na_values=-999)
prec.columns = ["station_id", "datetime", "quality", "depth", "ind", "wrtr", "eor"]
prec.datetime = pd.to_datetime(prec.datetime, format="%Y%m%d%H")
prec = prec.set_index("datetime")
f_press = "/media/x/cosmicsense/data/fendt/dwd/produkt_p0_stunde_02290.txt"
press = pd.read_csv(f_press, sep=";", na_values=-999)
press.columns = ["station_id", "datetime", "quality", "p", "p0", "eor"]
press.datetime = pd.to_datetime(press.datetime, format="%Y%m%d%H")
press = press.set_index("datetime")
f_meteo = "/media/x/cosmicsense/data/fendt/meteo/2019_metDataDE-Fen.dat"
names = ["datetime","temp2m","press2m","relhum2m","windsp2m","winddir2m","swdownrad2m","lwdownrad2m","precip"]
meteo = pd.read_csv(f_meteo, sep=",", skiprows=2, names=names)
meteo.datetime = pd.to_datetime(meteo.datetime)
meteo = meteo.set_index("datetime")
meteo["abshum2m"] = cs.conv.absolute_humidity(meteo.temp2m, meteo.relhum2m)
plt.rc('font', **{'size' : 12})
fig = plt.figure(figsize=(10, 10))
ax = plt.subplot(311)
plt.plot(prec["2019-05-06":].index, prec["2019-05-06":].depth.cumsum(), label="Hohenpeißenberg")
plt.plot(meteo["2019-05-06":].index, meteo["2019-05-06":].precip.cumsum(), label="On site")
#prec["2019-05-06":].depth.cumsum().plot()
plt.grid()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.title("Cumulative precipitation depth")
plt.legend()
plt.ylabel("mm")
xlim = ax.get_xlim()
ax = plt.subplot(312)
plt.plot(press["2019-05-06":].index, press["2019-05-06":].p0, label="Hohenpeißenberg")
plt.plot(meteo["2019-05-06":].index, meteo["2019-05-06":].press2m, label="On site")
plt.grid()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.title("Barometric pressure")
plt.ylabel("hPa")
ax = plt.subplot(313)
#plt.plot(press["2019-05-06":].index, press["2019-05-06":].p0, label="Hohenpeißenberg")
templ = plt.plot(meteo["2019-05-06":].index, meteo["2019-05-06":].temp2m, label="Air temp. (deg C)")
rehumpl = plt.plot(meteo["2019-05-06":].index, meteo["2019-05-06":].relhum2m, label="Rel. hum. (%)")
plt.grid()
plt.ylabel("deg C, %")
ax2 = ax.twinx()
abshumpl = plt.plot(meteo["2019-05-06":].index, meteo["2019-05-06":].abshum2m, color="black", label="Abs. hum. (g/m3)")
lns = templ+rehumpl+abshumpl
labs = [l.get_label() for l in lns]
ax.legend(lns, labs, loc="upper left")
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.title("Temperature and humidity (on site only)")
plt.ylabel("g / m3")
plt.tight_layout()
ax.set_xlim(xlim)
ids = [1, 2, 3, 4, 5, 6, 7, 8, 14, 16, 17, 18, 19, 21, 22, 23, 24, 25]
#fpath = "/home/maik/b2drop/cosmicsense/inbox/fendt/timeseries/crns/JFC-1/"
fpath = "/media/x/cosmicsense/data/fendt/crns/"
crns = {}
for id in ids:
df = pd.read_csv(path.join(fpath, "%d/%d_CRNS_merge.txt" % (id, id)), sep="\t")
df.datetime = pd.to_datetime(df.datetime)
df = df.set_index("datetime")
if id==4:
df["cph1"] = (df.counts1 + df.counts2) / cs.conv.s_to_h(df.nsecs1)
else:
df["cph1"] = df.counts1 / cs.conv.s_to_h(df.nsecs1)
try:
df["cph2"] = df.counts2 / cs.conv.s_to_h(df.nsecs2)
except AttributeError:
pass
print(id, end=": ")
print("%s to %s" % (df.index[0], df.index[-1]) )
crns[id] = df
min_dtime = np.min([crns[key].index[0] for key in crns.keys()])
max_dtime = np.max([crns[key].index[-1] for key in crns.keys()])
print(min_dtime, "-", max_dtime)
Some probes are affected by spurious count rates. In order to keep useful parts of the signal, a pragmatic/heuristic filtering approach is applied which could be refined later:
mincph, maxcph)mininterv)buffer of the median count rates over a period of 24 hours.mincph), and than apply the same approach (points 3-4) to eliminate spuriously small values. pars = {
1: {"mincph": 500, "maxcph": 1000, "mininterv": -9999, "type": "CRS 2000-B, Scn.", "lut": "forest/meadow"},
2: {"mincph": 500, "maxcph": 1100, "mininterv": -9999, "type": "CRS 1000", "lut": "meadow"},
3: {"mincph": 500, "maxcph": 1100, "mininterv": -9999, "type": "CRS 1000", "lut": "meadow"},
4: {"mincph": 6000, "maxcph": 9500, "mininterv": -9999, "type": "Lab C", "lut": "meadow"},
5: {"mincph": 800, "maxcph": 1500, "mininterv": -9999, "type": "CRS 1000-B", "lut": "meadow"},
6: {"mincph": 800, "maxcph": 1500, "mininterv": -9999, "type": "CRS 1000-B", "lut": "meadow, forest close"},
7: {"mincph": 1000, "maxcph": 1700, "mininterv": -9999, "type": "CRS 1000-B", "lut": "meadow"},
8: {"mincph": 1300, "maxcph": 2500, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow"},
14: {"mincph": 800, "maxcph": 1500, "mininterv": -9999, "type": "CRS 2000", "lut": "forest"},
16: {"mincph": 1500, "maxcph": 2500, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow"},
17: {"mincph": 1400, "maxcph": 2400, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow"},
18: {"mincph": 500, "maxcph": 1000, "mininterv": -9999, "type": "CRS 1000", "lut": "meadow"},
19: {"mincph": 1300, "maxcph": 2300, "mininterv": -9999, "type": "CRS 2000-B", "lut": "forest"},
21: {"mincph": 1400, "maxcph": 2300, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow, forest close"},
22: {"mincph": 1100, "maxcph": 2100, "mininterv": -9999, "type": "CRS 2000-B", "lut": "forest"},
23: {"mincph": 1200, "maxcph": 2200, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow, peat"},
24: {"mincph": 1600, "maxcph": 2600, "mininterv": -9999, "type": "CRS 2000-B", "lut": "meadow"},
25: {"mincph": 900, "maxcph": 1500, "mininterv": -9999, "type": "CRS 1000-B", "lut": "meadow"},
}
buffer = 0.075
for i, key in enumerate(crns.keys()):
x = crns[key].cph1.copy()
if not key==1:
x[x > pars[key]["maxcph"]] = np.nan
x[x < pars[key]["mincph"]] = np.nan
x[crns[key].nsecs1 < pars[key]["mininterv"]] = np.nan
median24 = x.resample("24H").median()
# Maxfilter
max6 = x.resample("6H").max()
median24max6 = max6.resample("24H").median()
maxfilter = np.array(median24max6 + buffer * median24)
# Minfilter
min6 = x.resample("6H").min()
median24min6 = min6.resample("24H").median()
minfilter = np.array(median24min6 - buffer * median24)
# Resample filter to original time stamps
crns[key]["cph1_maxfilter"] = np.interp(x.index, median24.index, maxfilter)
crns[key]["cph1_minfilter"] = np.interp(x.index, median24.index, minfilter)
# Fill gaps
crns[key]["cph1_maxfilter"] = crns[key].cph1_maxfilter.interpolate()
crns[key]["cph1_minfilter"] = crns[key].cph1_minfilter.interpolate()
# Apply filter
crns[key]["cph1_filtered"] = x
if not key==1:
crns[key].loc[crns[key].cph1 > crns[key].cph1_maxfilter, "cph1_filtered"] = np.nan
crns[key].loc[crns[key].cph1 < crns[key].cph1_minfilter, "cph1_filtered"] = np.nan
plt.rc('font', **{'size' : 12})
fig, ax = plt.subplots(nrows=len(crns), figsize=(12,40))
xlim = min_dtime, max_dtime
for i, key in enumerate(crns.keys()):
ax[i].plot(crns[key].index, crns[key].cph1, linestyle="None", marker=".", ms=1, color="red")
ax[i].plot(crns[key].index, crns[key].cph1_maxfilter, linestyle="-", ms=0, color="orange")
ax[i].plot(crns[key].index, crns[key].cph1_minfilter, linestyle="-", ms=0, color="orange")
ax[i].plot(crns[key].index, crns[key].cph1_filtered, linestyle="None", marker=".", ms=1, color="black")
ax[i].set_title(key)
ax[i].set_xlabel("")
ax[i].set_ylabel("cph")
ax[i].set_xlim(xlim)
#ax[i].set_ylim(pars[key]["mincph"], pars[key]["maxcph"]+200)
ax[i].grid()
#ax[i].legend()
plt.tight_layout()
dtrange = pd.date_range('2019-05-01 00:00:00', max_dtime, freq="20T")
crns20 = pd.DataFrame({}, index=dtrange)
for i, key in enumerate(crns.keys()):
crns20[key] = crns[key].cph1_filtered.resample('20T').nearest(limit=1).reindex(dtrange)
pp = sb.pairplot(crns20)
# NMDB data
nmdb = pd.read_csv("/media/x/cosmicsense/data/fendt/nmdb/nmdb.txt", sep=";",
comment="#", na_values=" null")
nmdb.datetime = pd.to_datetime(nmdb.datetime)
nmdb = nmdb.set_index("datetime")
fig, ax1 = plt.subplots(figsize=(12, 5))
plt.plot(nmdb.index, nmdb.JUNG, "b-", label="JUNG")
plt.ylabel("counts / s")
plt.legend()
ax2 = ax1.twinx()
plt.plot(nmdb.index, nmdb.JUNG1, "r-", label="JUNG1")
plt.ylabel("counts / s")
leg = plt.legend(loc="lower right")
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.title("Incoming neutron flux at Jungfraujoch")
Several literature references (e.g. Zreda et al. 2012, Schroen et al. 2015, Andreasen et al. 2017) suggest to correct for variations in incoming neutron fluxes based on cosmic-ray neutron monitors available through http://www.nmdb.eu/nest. The idea is to compute a simple scaling factor $f_i$ based on measure neutron intensity $f_m$ and an arbitrary reference intensity $f_{ref}$ that depends on the actual neutron monitor.
\begin{equation*} f_i = \frac{I_m}{I_{ref}} \end{equation*}
In the dissertation thesis of Schroen (2016), a reference value $f_{ref}$ of 150 cps is suggested for the monitor on Jungfraujoch (JUNG).
fi = (nmdb.JUNG / 150.)#.resample("6H").mean()
fi.name="fi"
fi = fi.resample("1H").mean().resample("20T").ffill().reindex(dtrange)
Again based on Zreda et al. (2012), Andreasen et al. (2017) and many others, a correction factor $f_p$ is suggested in order to account for variations in barometric pressure.
\begin{equation*} f_p = exp\Bigl(\frac{p_0 - p}{L}\Bigl) \end{equation*}
Quoting from Andreasen et al. (2017):
[...] $L$ is the mass attenuation length for high-energy neutrons and is a function of cutoff rigidity (Desilets et al., 2006), $p$ is the barometric pressure at the time of measurement, and $P_0$ is an arbitrary reference pressure. Note that the units of $L$, $p$, and $p_0$ can be shielding depth (g/cm2) or pressure (Pa), where $1 g/cm2 = 98.0665 Pa$. If shielding depth is used, $L$ ranges from 130 g/cm2 at high latitudes to 144 g/cm2 at the equator (see Fig. 1).
Zreda et al. (2012) complement that
[... $p_0$] can be selected to be the long-term average pressure at the specific site, sea-level pressure, or long-term averagepressure at a different reference site.
How do we quantify $p_0$? Based on site average, or just based on standard sea level pressure (1013 mbar)?**
For now, we use $p_0 = 1013.25 mbar = 101325 Pa = 1033.23 g/cm²$ or site average, and $L=131.6$ for Germany (Fig. 1 in Andreasen et al. (2017).
p_0 = press.p0.mean()
L = 131.6 # g/cm2
fp = cs.core.corrfact_baro(press.p0, p_0, L)
fp = fp.resample("20T").ffill().reindex(dtrange)
fp.name="fp"
fp2 = cs.core.corrfact_baro(meteo.press2m, meteo.press2m.mean(), L)
fp2 = fp2.resample("20T").mean()[dtrange[0]:]
fp2.name="fp2"
fp2.plot()
fp.plot()
#ax = fp.plot(title="Barometric correction factor")
#fp2.plot(ax=ax)
#plt.plot(fp.index, fp)
#plt.plot(fp2.index, fp2)
In their overview, Andreasen et al. (2017) refer to Rosolem et al. (2013) when accounting for the effects of atmospheric water vapor:
\begin{equation*} f_{wv} = 1 + 0.0054 * (h - h_{ref}) \end{equation*}
where $h$ is the absolute humidity of the air (in g/m3), and $h_{ref}$ is the absolute humidity at an arbitrary reference time.
The references do not elaborate on how to obtain the absolute humidity, but given the relative humidity and air temperature, we typically obtain $h$ by combining
\begin{equation*} e = e_s * rh / 100. \end{equation*}
\begin{equation*} e_s(T) = 6.1094 * exp\Bigl(\frac{17.625*T}{T + 243.04}\Bigl) \end{equation*}
\begin{equation*} e * V = m * R_s * T \end{equation*}
# dummy correction factor for humidity
fwv = cs.core.corrfact_vapor_rosolem(meteo.abshum2m)
#fwv = pd.Series(data=np.ones(len(crns20.index)), index=crns20.index)
fwv.name = "fwv"
We can now inspect the different correction factor, and use them to correct our neutron counts. According to Andreasen, this is done via
\begin{equation*} N_{cor} = \frac{N*f_{wv}}{f_p*f_i} \end{equation*}
crns20c = crns20.copy()
for id in ids:
crns20c[id] = crns20c[id] * fwv / (fi * fp)
crns20cst = crns20c.copy()
#crns6hcst = crns6hcst.drop(columns=["fi", "fp", "fwv"])
for i, id in enumerate(ids):
if id==19:
fact = np.nanmean(crns20c["2019-05-22":"2019-05-29"][4]) / np.nanmean(crns20c["2019-05-22":"2019-05-29"][id])
crns20cst[id] = crns20c[id] * fact
elif id==1:
#continue
fact2 = np.nanmean(crns20c["2019-06-06 12:00:00":"2019-06-12 00:00:00"][1]) / np.nanmean(crns20c["2019-05-22":"2019-05-29"][1])
crns1 = crns20c[1].copy()
crns1[:"2019-06-06 03:00:00"] *= fact2
fact = np.nanmean(crns20c["2019-05-22":"2019-05-29"][4]) / np.nanmean(crns1["2019-05-22":"2019-05-29"])
crns1 *= fact
crns20cst[id] = crns1
else:
fact = np.nanmean(crns20c["2019-05-15":"2019-05-22"][4]) / np.nanmean(crns20c["2019-05-15":"2019-05-22"][id])
crns20cst[id] = crns20c[id] * fact
plt.rc('font', **{'size' : 12})
colors = plt.cm.tab20(np.linspace(0,1,len(ids)))
fig, ax = plt.subplots(nrows=5, figsize=(12,20))
plt.sca(ax[0])
for i, id in enumerate(ids[1:]):
plt.plot(crns20cst.index, crns20cst[id], color=colors[i], lw=0.5)
plt.plot(crns20cst.index, crns20cst[ids[1:]].mean(axis=1), color="black")
#plt.legend(ncol=2)
#plt.ylim(6500, 8000)
plt.grid()
plt.title("Standardized count rates (20 minutes)")
ax[0].xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.sca(ax[1])
plt.plot(crns20cst.index, crns20cst[ids[1:]].mean(axis=1), lw=0.5, color="grey")
tmp = crns20cst[ids[1:]].mean(axis=1).resample("6H", closed="right", loffset=dt.timedelta(hours=3)).mean()
plt.plot(tmp.index, tmp, lw=2)
tmp = crns20cst[ids[1:]].mean(axis=1).resample("24H", closed="right", loffset=dt.timedelta(hours=12)).mean()
plt.plot(tmp.index, tmp, lw=2)
tmp = crns20cst[ids[1:]].mean(axis=1).rolling("24H").mean()
plt.plot(tmp.index-dt.timedelta(hours=12), tmp, lw=2, color="red")
plt.xlim(ax[0].get_xlim())
ax[1].xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.ylim(6800, 7600)
plt.grid()
ax2 = ax[1].twinx()
plt.plot(prec["2019-05-01":].index, prec["2019-05-01":].depth.cumsum(), "--", color="black")
plt.plot(meteo["2019-05-01":].index, meteo["2019-05-01":].precip.cumsum(), "--", color="blue")
plt.title("Average over all standardized count rates (20 mins)")
plt.sca(ax[2])
crns6hchst = crns20cst.resample("6H").mean()
for i, id in enumerate(ids[1:]):
plt.plot(crns6hchst.index+dt.timedelta(hours=3), crns6hchst[id], color=colors[i])
plt.ylim(6500, 8000)
plt.grid()
plt.title("Resampled cph (24 hours)")
ax[2].xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.sca(ax[3])
crns24hchst = crns20cst.resample("24H").mean()
for i, id in enumerate(ids[1:]):
plt.plot(crns24hchst.index+dt.timedelta(hours=12), crns24hchst[id], color=colors[i])
plt.ylim(6500, 8000)
plt.grid()
plt.title("Resampled cph (24 hours)")
ax[3].xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.sca(ax[4])
plt.plot(prec["2019-05-01":].index, prec["2019-05-01":].depth.cumsum())
plt.xlim(ax[0].get_xlim())
ax[4].xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.grid()
plt.title("Cumulative precipitation depth (Hohenpeißenberg)")
plt.tight_layout()
plt.rc('font', **{'size' : 12})
colors = plt.cm.tab20(np.linspace(0,1,len(ids)))
fig = plt.figure(figsize=(12,15))
ax1 = fig.add_subplot(411)
#pl = crns6hcst.resample("24H").mean().plot(y=ids, ax=ax1, color=colors)
#crns6hcst.plot(ax=ax1, color=colors)
for i, id in enumerate(ids):
if id==1:
continue
#pass
ax1.plot(crns20cst.index+dt.timedelta(hours=3), crns6hcst[id], color=colors[i])
#ax1.plot(crns24chst.index+dt.timedelta(hours=12), crns24chst[id], color=colors[i])
ax1.legend(ncol=2)
plt.ylim(6500, 8000)
plt.grid()
plt.title("Resampled cph (6 hours)")
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
ax = fig.add_subplot(412)
crns24chst = crns6hcst.resample("24H").mean()
#pl = crns6hcst.resample("24H").mean().plot(y=ids, ax=ax1, color=colors)
#crns6hcst.plot(ax=ax1, color=colors)
for i, id in enumerate(ids):
if id==1:
continue
#pass
#ax1.plot(crns6hcst.index+dt.timedelta(hours=3), crns6hcst[id], color=colors[i])
ax.plot(crns24chst.index+dt.timedelta(hours=12), crns24chst[id], color=colors[i])
#ax.legend(ncol=2)
plt.ylim(6500, 8000)
plt.grid()
plt.title("Resampled cph (24 hours)")
ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
ax2 = fig.add_subplot(413)
ax2.plot(prec["2019-05-01":].index, prec["2019-05-01":].depth.cumsum())
#prec[crns6hcst.index[0]:crns6hcst.index[-1]].depth.cumsum().plot()
ax2.set_xlim(ax1.get_xlim())
ax2.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.grid()
plt.title("Cumulative precipitation depth (Hohenpeißenberg)")
ax3 = fig.add_subplot(414)
ax3.plot(fi.index, fi, label="fi")
ax3.plot(fp.index, fp, label="fp")
ax3.set_xlim(ax1.get_xlim())
ax3.legend()
plt.grid()
plt.title("Correction factors")
ax3.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlim(min_dtime-dt.timedelta(hours=1), None)
plt.tight_layout()
crns6hcst[4].plot()
plt.figure(figsize=(15,8))
plt.plot(crns[4].cph1.index, crns[4].cph1, "bo", ms=1)
plt.plot(crns6h.index + dt.timedelta(hours=3), crns6h[4])
plt.grid()
plt.ylim(6000, 9000)